In [7]:
%matplotlib inline
import matplotlib.pyplot as plt

In [9]:
xlow = 0
xhigh = np.pi/2
x = np.linspace(xlow, xhigh, num = 100)
y = np.cos(x)

In [10]:
plt.figure()
plt.plot(x, y)
plt.show()



In [18]:
dim = 100
x_mcmc = np.random.random(dim) * (xhigh - xlow) + xlow
y_mcmc = np.cos(x_mcmc)

In [19]:
plt.figure()
plt.plot(x_mcmc, y_mcmc, 'o')
plt.show()



In [21]:
y_mcmc.mean() * (xhigh - xlow)


Out[21]:
0.99904262914854747

In [ ]:
(xhigh - xlow) * np.sqrt(y_mcmc.

In [1]:
from numpy.random import random

Metropolis-Hastings Algorithm


In [23]:
def sdnorm(z):
    """
    Standard normal pdf (Probability Density Function)
    """
    return np.exp(-z*z/2.)/np.sqrt(2*np.pi)

n = 10000
alpha = 1
x = 0.
vec = []
vec.append(x)
innov = random(n) * 2 * alpha - alpha #random inovation, uniform proposal distribution
for i in xrange(1,n):
    can = x + innov[i] #candidate
    aprob = np.min([1.,sdnorm(can)/sdnorm(x)]) #acceptance probability
    u = random(1)
    if u[0] < aprob:
        x = can
        vec.append(x)

In [24]:
len(vec)


Out[24]:
8066

In [25]:
#plotting the results:
#theoretical curve
x = np.arange(-3,3,.1)
y = sdnorm(x)
plt.subplot(211)
plt.title('Metropolis-Hastings')
plt.plot(vec)
plt.subplot(212)

plt.hist(vec, bins=30,normed=1)
plt.plot(x,y,'ro')
plt.ylabel('Frequency')
plt.xlabel('x')
plt.legend(('PDF','Samples'))
plt.show()


my version of the code


In [41]:
def metropolis_hastings(f, init = 0, nsamples = 1000):
    alpha = 1
    sample_count = 0
    x = init
    vec = []
    vec.append(x)
    while sample_count < nsamples-1:
        can = x + random(1)[0] * 2 * alpha - alpha #candidate
        aprob = np.min([1.,sdnorm(can)/sdnorm(x)]) #acceptance probability
        u = random(1)
        if u[0] < aprob:
            x = can
            vec.append(x)
            sample_count+=1
    return np.array(vec)

In [42]:
vec = metropolis_hastings(sdnorm, nsamples=10000)

In [43]:
len(vec)


Out[43]:
10000

In [44]:
#plotting the results:
#theoretical curve
x = np.arange(-3,3,.1)
y = sdnorm(x)
plt.subplot(211)
plt.title('Metropolis-Hastings')
plt.plot(vec)
plt.subplot(212)

plt.hist(vec, bins=30,normed=1)
plt.plot(x,y,'ro')
plt.ylabel('Frequency')
plt.xlabel('x')
plt.legend(('PDF','Samples'))
plt.show()



In [45]:
def gauss(z):
    """
    Standard normal gaussian
    """
    return np.exp(-z*z/2.)

In [48]:
import scipy.integrate as integrate

In [50]:
integrate.quad(gauss, -np.Inf, np.Inf)


Out[50]:
(2.5066282746309994, 2.5512942192316024e-08)

In [51]:
np.sqrt(2*np.pi)


Out[51]:
2.5066282746310002

In [167]:
def triangle(z):
    if (type(z) is not type([1])) and (type(z) is not type(np.array(1))):
        x = np.array([z])
    else:
        x = np.array(z)
    return np.piecewise(x, [x < 0, (x < 10) & (x > 0), (x >= 10) & (x <= 20), x > 20], [0, lambda x: x, lambda x: - x + 20, 0])

In [168]:
triangle([1])


Out[168]:
array([1])

In [169]:
x = np.arange(-100,100,.1)
plt.figure()
plt.plot(x, triangle(x))
plt.show()



In [170]:
type(np.array([1]))


Out[170]:
numpy.ndarray

In [174]:
integrate.quad(triangle, -np.Inf, np.Inf)


Out[174]:
(100.00000004456021, 8.648601692584634e-07)

In [178]:
vec = metropolis_hastings(lambda f: 1/100*triangle(x), nsamples=10000)

In [181]:
x = np.arange(0,100,.1)
y = 1/100 * triangle(x)
plt.subplot(211)
plt.title('Metropolis-Hastings')
plt.plot(vec)
plt.subplot(212)

plt.hist(vec, bins=30,normed=1)
plt.plot(x,y,'ro')
plt.ylabel('Frequency')
plt.xlabel('x')
plt.xlim((-30,30))
plt.legend(('PDF','Samples'))
plt.show()



In [184]:
import pymc

In [185]:
@pymc.stochastic(dtype=int)
def switchpoint(value=1900, t_l=1851, t_h=1962):
    """The switchpoint for the rate of disaster occurrence."""
    if value > t_h or value < t_l:
        # Invalid values
        return -np.inf
    else:
        # Uniform log-likelihood
        return -np.log(t_h - t_l + 1)

In [189]:
from scipy.stats import rv_discrete
from scipy import stats
normdiscrete = stats.rv_discrete(values=(gridint, np.round(probs, decimals=7)), name='normdiscrete')
n_sample = 500
np.random.seed(87655678)   # fix the seed for replicability
rvs = normdiscrete.rvs(size=n_sample)
rvsnd = rvs
f, l = np.histogram(rvs, bins=gridlimits)
sfreq = np.vstack([gridint, f, probs*n_sample]).T
print sfreq


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-189-b50054185a3f> in <module>()
      1 from scipy.stats import rv_discrete
      2 from scipy import stats
----> 3 normdiscrete = stats.rv_discrete(values=(gridint, np.round(probs, decimals=7)), name='normdiscrete')
      4 n_sample = 500
      5 np.random.seed(87655678)   # fix the seed for replicability

NameError: name 'gridint' is not defined

In [219]:
from scipy import stats
class deterministic_gen(stats.rv_continuous):
     def _pdf(self, x):
        return 1/x

In [220]:
deterministic = deterministic_gen(name="deterministic")

In [222]:
deterministic.cdf(np.arange(1, 3, 0.5))


---------------------------------------------------------------------------
ZeroDivisionError                         Traceback (most recent call last)
<ipython-input-222-48cd819759f9> in <module>()
----> 1 deterministic.cdf(np.arange(1, 3, 0.5))

/Users/schriste/anaconda/lib/python2.7/site-packages/scipy/stats/distributions.pyc in cdf(self, x, *args, **kwds)
   1342         if any(cond):  # call only if at least 1 entry
   1343             goodargs = argsreduce(cond, *((x,)+args))
-> 1344             place(output,cond,self._cdf(*goodargs))
   1345         if output.ndim == 0:
   1346             return output[()]

/Users/schriste/anaconda/lib/python2.7/site-packages/scipy/stats/distributions.pyc in _cdf(self, x, *args)
   1199 
   1200     def _cdf(self, x, *args):
-> 1201         return self.veccdf(x,*args)
   1202 
   1203     def _logcdf(self, x, *args):

/Users/schriste/anaconda/lib/python2.7/site-packages/numpy/lib/function_base.pyc in __call__(self, *args, **kwargs)
   1570             vargs.extend([kwargs[_n] for _n in names])
   1571 
-> 1572         return self._vectorize_call(func=func, args=vargs)
   1573 
   1574     def _get_ufunc_and_otypes(self, func, args):

/Users/schriste/anaconda/lib/python2.7/site-packages/numpy/lib/function_base.pyc in _vectorize_call(self, func, args)
   1636                       for _a in args]
   1637 
-> 1638             outputs = ufunc(*inputs)
   1639 
   1640             if ufunc.nout == 1:

/Users/schriste/anaconda/lib/python2.7/site-packages/scipy/stats/distributions.pyc in _cdf_single_call(self, x, *args)
   1196 
   1197     def _cdf_single_call(self, x, *args):
-> 1198         return integrate.quad(self._pdf, self.a, x, args=args)[0]
   1199 
   1200     def _cdf(self, x, *args):

/Users/schriste/anaconda/lib/python2.7/site-packages/scipy/integrate/quadpack.pyc in quad(func, a, b, args, full_output, epsabs, epsrel, limit, points, weight, wvar, wopts, maxp1, limlst)
    252         args = (args,)
    253     if (weight is None):
--> 254         retval = _quad(func,a,b,args,full_output,epsabs,epsrel,limit,points)
    255     else:
    256         retval = _quad_weight(func,a,b,args,full_output,epsabs,epsrel,limlst,limit,maxp1,weight,wvar,wopts)

/Users/schriste/anaconda/lib/python2.7/site-packages/scipy/integrate/quadpack.pyc in _quad(func, a, b, args, full_output, epsabs, epsrel, limit, points)
    319             return _quadpack._qagse(func,a,b,args,full_output,epsabs,epsrel,limit)
    320         else:
--> 321             return _quadpack._qagie(func,bound,infbounds,args,full_output,epsabs,epsrel,limit)
    322     else:
    323         if infbounds != 0:

<ipython-input-219-9d04ce8ef423> in _pdf(self, x)
      2 class deterministic_gen(stats.rv_continuous):
      3      def _pdf(self, x):
----> 4         return 1/x

ZeroDivisionError: float division by zero

In [223]:
deterministic.pdf(np.arange(-3, 3, 0.5))


-c:4: RuntimeWarning: divide by zero encountered in divide
Out[223]:
array([-0.33333333, -0.4       , -0.5       , -0.66666667, -1.        ,
       -2.        ,         inf,  2.        ,  1.        ,  0.66666667,
        0.5       ,  0.4       ])

In [224]:
deterministic.rvs(size=10)


---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-224-eafdfe3a1ee3> in <module>()
----> 1 deterministic.rvs(size=10)

/Users/schriste/anaconda/lib/python2.7/site-packages/scipy/stats/distributions.pyc in rvs(self, *args, **kwds)
    669             return loc*ones(size, 'd')
    670 
--> 671         vals = self._rvs(*args)
    672         if self._size is not None:
    673             vals = reshape(vals, size)

/Users/schriste/anaconda/lib/python2.7/site-packages/scipy/stats/distributions.pyc in _rvs(self, *args)
   1192         ## Use basic inverse cdf algorithm for RV generation as default.
   1193         U = mtrand.sample(self._size)
-> 1194         Y = self._ppf(U,*args)
   1195         return Y
   1196 

/Users/schriste/anaconda/lib/python2.7/site-packages/scipy/stats/distributions.pyc in _ppf(self, q, *args)
   1211 
   1212     def _ppf(self, q, *args):
-> 1213         return self.vecfunc(q,*args)
   1214 
   1215     def _isf(self, q, *args):

/Users/schriste/anaconda/lib/python2.7/site-packages/numpy/lib/function_base.pyc in __call__(self, *args, **kwargs)
   1570             vargs.extend([kwargs[_n] for _n in names])
   1571 
-> 1572         return self._vectorize_call(func=func, args=vargs)
   1573 
   1574     def _get_ufunc_and_otypes(self, func, args):

/Users/schriste/anaconda/lib/python2.7/site-packages/numpy/lib/function_base.pyc in _vectorize_call(self, func, args)
   1636                       for _a in args]
   1637 
-> 1638             outputs = ufunc(*inputs)
   1639 
   1640             if ufunc.nout == 1:

/Users/schriste/anaconda/lib/python2.7/site-packages/scipy/stats/distributions.pyc in _ppf_single_call(self, q, *args)
   1154 
   1155         return optimize.brentq(self._ppf_to_solve,
-> 1156                                left, right, args=(q,)+args, xtol=self.xtol)
   1157 
   1158     # moment from definition

/Users/schriste/anaconda/lib/python2.7/site-packages/scipy/optimize/zeros.pyc in brentq(f, a, b, args, xtol, rtol, maxiter, full_output, disp)
    413     if rtol < _rtol:
    414         raise ValueError("rtol too small (%g < %g)" % (rtol, _rtol))
--> 415     r = _zeros._brentq(f,a,b,xtol,rtol,maxiter,args,full_output,disp)
    416     return results_c(full_output, r)
    417 

RuntimeError: Failed to converge after 100 iterations.

In [195]:
deterministic?

In [239]:
def getDistribution(data):
    kernel = stats.gaussian_kde(data)
    class rv(stats.rv_continuous):
        def _rvs(self, *x, **y):
            # don't ask me why it's using self._size 
            # nor why I have to cast to int
            return kernel.resample(int(self._size)) 
        def _cdf(self, x):
            return kernel.integrate_box_1d(-numpy.Inf, x)
        def _pdf(self, x):
            return kernel.evaluate(x)
    return rv(name='kdedist')

In [240]:
f = getDistribution(np.random.randn(100))

In [243]:
f.rvs([1])


---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-243-d05ef689c9d3> in <module>()
----> 1 f.rvs([1])

/Users/schriste/anaconda/lib/python2.7/site-packages/scipy/stats/distributions.pyc in rvs(self, *args, **kwds)
    669             return loc*ones(size, 'd')
    670 
--> 671         vals = self._rvs(*args)
    672         if self._size is not None:
    673             vals = reshape(vals, size)

<ipython-input-239-8854fe964a63> in _rvs(self, *x, **y)
      5             # don't ask me why it's using self._size
      6             # nor why I have to cast to int
----> 7             return kernel.resample(int(self._size))
      8         def _cdf(self, x):
      9             return kernel.integrate_box_1d(-numpy.Inf, x)

TypeError: int() argument must be a string or a number, not 'NoneType'

In [231]:
def getDistribution(data):
    kernel = stats.gaussian_kde(data)
    class rv(stats.rv_continuous):
        def _cdf(self, x):
            return kernel.integrate_box_1d(-numpy.Inf, x)
        def _pdf(self, x):
            return kernel.evaluate(x)
    return rv(name='kdedist')


Out[231]:
array([ 0.21934769])

In [244]:
kernel = stats.gaussian_kde(np.random.randn(100))

In [245]:
kernel


Out[245]:
<scipy.stats.kde.gaussian_kde at 0x10b0f2a90>

In [246]:
type(kernel)


Out[246]:
scipy.stats.kde.gaussian_kde

In [247]:
kernel.resample(10)


Out[247]:
array([[ 0.73375879, -1.45096952,  0.50167046, -0.27989526, -0.5990068 ,
        -0.35663271, -2.82007892,  0.59557902, -0.71221009,  0.40275279]])

In [256]:
def getDistribution():
    f = np.random.normal
    class rv(stats.rv_continuous):
        def _pdf(self, x):
            return f(x)
    return rv(name='kdedist')

In [257]:
a = getDistribution()

In [262]:
a.rvs(size=10)


---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-262-a96fcc4f0fde> in <module>()
----> 1 a.rvs(size=10)

/Users/schriste/anaconda/lib/python2.7/site-packages/scipy/stats/distributions.pyc in rvs(self, *args, **kwds)
    669             return loc*ones(size, 'd')
    670 
--> 671         vals = self._rvs(*args)
    672         if self._size is not None:
    673             vals = reshape(vals, size)

/Users/schriste/anaconda/lib/python2.7/site-packages/scipy/stats/distributions.pyc in _rvs(self, *args)
   1192         ## Use basic inverse cdf algorithm for RV generation as default.
   1193         U = mtrand.sample(self._size)
-> 1194         Y = self._ppf(U,*args)
   1195         return Y
   1196 

/Users/schriste/anaconda/lib/python2.7/site-packages/scipy/stats/distributions.pyc in _ppf(self, q, *args)
   1211 
   1212     def _ppf(self, q, *args):
-> 1213         return self.vecfunc(q,*args)
   1214 
   1215     def _isf(self, q, *args):

/Users/schriste/anaconda/lib/python2.7/site-packages/numpy/lib/function_base.pyc in __call__(self, *args, **kwargs)
   1570             vargs.extend([kwargs[_n] for _n in names])
   1571 
-> 1572         return self._vectorize_call(func=func, args=vargs)
   1573 
   1574     def _get_ufunc_and_otypes(self, func, args):

/Users/schriste/anaconda/lib/python2.7/site-packages/numpy/lib/function_base.pyc in _vectorize_call(self, func, args)
   1636                       for _a in args]
   1637 
-> 1638             outputs = ufunc(*inputs)
   1639 
   1640             if ufunc.nout == 1:

/Users/schriste/anaconda/lib/python2.7/site-packages/scipy/stats/distributions.pyc in _ppf_single_call(self, q, *args)
   1154 
   1155         return optimize.brentq(self._ppf_to_solve,
-> 1156                                left, right, args=(q,)+args, xtol=self.xtol)
   1157 
   1158     # moment from definition

/Users/schriste/anaconda/lib/python2.7/site-packages/scipy/optimize/zeros.pyc in brentq(f, a, b, args, xtol, rtol, maxiter, full_output, disp)
    413     if rtol < _rtol:
    414         raise ValueError("rtol too small (%g < %g)" % (rtol, _rtol))
--> 415     r = _zeros._brentq(f,a,b,xtol,rtol,maxiter,args,full_output,disp)
    416     return results_c(full_output, r)
    417 

ValueError: f(a) and f(b) must have different signs

In [264]:
import scipy.integrate as integrate
result = integrate.quad(lambda x: np.random.normal(x), 0, 4.5)